# -*- coding: utf-8 -*-
import logging

import pymongo
from pymongo.errors import DuplicateKeyError
from twisted.internet import reactor, defer


class MongoPipline(object):
    """
    异步插入MongoDB
    """
    def __init__(self, mongo_uri, mongo_db):
        self.mongo_uri = mongo_uri
        self.mongo_db = mongo_db
        self.logger = logging.getLogger()

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            mongo_uri=crawler.settings.get('MONGO_URI', 'mongodb://127.0.0.1:27017/'),
            mongo_db=crawler.settings.get('DB_NAME'),
        )

    def open_spider(self, spider):
        """
        爬虫启动时，启动
        :param spider:
        :return:
        """
        self.client = pymongo.MongoClient(self.mongo_uri)
        self.mongodb = self.client[self.mongo_db]

    def close_spider(self, spider):
        """
        爬虫关闭时执行
        :param spider:
        :return:
        """
        self.client.close()

    @defer.inlineCallbacks
    def process_item(self, item, spider):
        out = defer.Deferred()
        reactor.callInThread(self._insert, item, out, spider)
        yield out
        defer.returnValue(item)

    def _insert(self, item, out, spider):
        """
        插入函数
        :param item:
        :param out:
        :return:
        """
        collection, unique_key = item.get_db_collection_unique_key()
        try:
            self.mongodb[collection].insert_one(dict(item))
            self.logger.info('插入成功')
        except DuplicateKeyError:
            self.mongodb[collection].update_one({unique_key: item[unique_key]}, {'$set': dict(item)})
            self.logger.info('更新成功')
        except Exception as e:
            self.logger.error('插入失败', e)
        reactor.callFromThread(out.callback, item)

